{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div align='center'>多元回归分析(MLR)</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.api as sm\n",
    "import statsmodels.formula.api as smf\n",
    "\n",
    "from scipy import stats\n",
    "\n",
    "import warnings\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "    \n",
    "%matplotlib inline\n",
    "\n",
    "plt.style.use('ggplot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>不良贷款\n",
       "(亿元）</th>\n",
       "      <th>各项贷款余额\n",
       "(亿元)</th>\n",
       "      <th>本年累计应收贷款\n",
       "(亿元)</th>\n",
       "      <th>贷款项目个数\n",
       "(个)</th>\n",
       "      <th>本年固定资产投资额\n",
       "(亿元)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>11.6</td>\n",
       "      <td>368.2</td>\n",
       "      <td>16.8</td>\n",
       "      <td>32</td>\n",
       "      <td>163.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1.6</td>\n",
       "      <td>95.7</td>\n",
       "      <td>3.8</td>\n",
       "      <td>10</td>\n",
       "      <td>44.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.2</td>\n",
       "      <td>109.6</td>\n",
       "      <td>10.3</td>\n",
       "      <td>14</td>\n",
       "      <td>67.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>7.2</td>\n",
       "      <td>196.2</td>\n",
       "      <td>15.8</td>\n",
       "      <td>16</td>\n",
       "      <td>39.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>3.2</td>\n",
       "      <td>102.2</td>\n",
       "      <td>12.0</td>\n",
       "      <td>10</td>\n",
       "      <td>97.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    不良贷款\\n(亿元）  各项贷款余额\\n(亿元)  本年累计应收贷款\\n(亿元)  贷款项目个数\\n(个)  本年固定资产投资额\\n(亿元)\n",
       "20        11.6         368.2            16.8           32            163.9\n",
       "21         1.6          95.7             3.8           10             44.5\n",
       "22         1.2         109.6            10.3           14             67.9\n",
       "23         7.2         196.2            15.8           16             39.7\n",
       "24         3.2         102.2            12.0           10             97.1"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 贾俊平: 多元回归章节\n",
    "db = '/home/lidong/Datasets/'\n",
    "bad_loans_df = pd.read_excel(os.path.join(db, \"Statistics/bad-loans.xls\"), usecols=\"B:F\")\n",
    "bad_loans_df[-5:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 有问题Buglist\n",
    "# bad_loans_df.columns\n",
    "# formula_model = smf.ols(\n",
    "#     formula='不良贷款\\n(亿元）~各项贷款余额\\n(亿元)+本年累计应收贷款\\n(亿元)+贷款项目个数\\n(个)+本年固定资产投资额\\n(亿元)',\n",
    "#     data=bad_loans_df)\n",
    "# \n",
    "# formula_model_result = formula_model.fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>        <td>不良贷款\n",
       "(亿元）</td>    <th>  R-squared:         </th> <td>   0.798</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.757</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   19.70</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Tue, 25 Dec 2018</td> <th>  Prob (F-statistic):</th> <td>1.04e-06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>22:02:27</td>     <th>  Log-Likelihood:    </th> <td> -47.082</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    25</td>      <th>  AIC:               </th> <td>   104.2</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    20</td>      <th>  BIC:               </th> <td>   110.3</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "         <td></td>           <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>          <td>   -1.0216</td> <td>    0.782</td> <td>   -1.306</td> <td> 0.206</td> <td>   -2.654</td> <td>    0.610</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>各项贷款余额\n",
       "(亿元)</th>    <td>    0.0400</td> <td>    0.010</td> <td>    3.837</td> <td> 0.001</td> <td>    0.018</td> <td>    0.062</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>本年累计应收贷款\n",
       "(亿元)</th>  <td>    0.1480</td> <td>    0.079</td> <td>    1.879</td> <td> 0.075</td> <td>   -0.016</td> <td>    0.312</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>贷款项目个数\n",
       "(个)</th>     <td>    0.0145</td> <td>    0.083</td> <td>    0.175</td> <td> 0.863</td> <td>   -0.159</td> <td>    0.188</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>本年固定资产投资额\n",
       "(亿元)</th> <td>   -0.0292</td> <td>    0.015</td> <td>   -1.937</td> <td> 0.067</td> <td>   -0.061</td> <td>    0.002</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td> 0.316</td> <th>  Durbin-Watson:     </th> <td>   2.626</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.854</td> <th>  Jarque-Bera (JB):  </th> <td>   0.442</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.220</td> <th>  Prob(JB):          </th> <td>   0.802</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 2.520</td> <th>  Cond. No.          </th> <td>    352.</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:              不良贷款\n",
       "(亿元）   R-squared:                       0.798\n",
       "Model:                            OLS   Adj. R-squared:                  0.757\n",
       "Method:                 Least Squares   F-statistic:                     19.70\n",
       "Date:                Tue, 25 Dec 2018   Prob (F-statistic):           1.04e-06\n",
       "Time:                        22:02:27   Log-Likelihood:                -47.082\n",
       "No. Observations:                  25   AIC:                             104.2\n",
       "Df Residuals:                      20   BIC:                             110.3\n",
       "Df Model:                           4                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==================================================================================\n",
       "                     coef    std err          t      P>|t|      [0.025      0.975]\n",
       "----------------------------------------------------------------------------------\n",
       "const             -1.0216      0.782     -1.306      0.206      -2.654       0.610\n",
       "各项贷款余额\n",
       "(亿元)        0.0400      0.010      3.837      0.001       0.018       0.062\n",
       "本年累计应收贷款\n",
       "(亿元)      0.1480      0.079      1.879      0.075      -0.016       0.312\n",
       "贷款项目个数\n",
       "(个)         0.0145      0.083      0.175      0.863      -0.159       0.188\n",
       "本年固定资产投资额\n",
       "(亿元)    -0.0292      0.015     -1.937      0.067      -0.061       0.002\n",
       "==============================================================================\n",
       "Omnibus:                        0.316   Durbin-Watson:                   2.626\n",
       "Prob(Omnibus):                  0.854   Jarque-Bera (JB):                0.442\n",
       "Skew:                           0.220   Prob(JB):                        0.802\n",
       "Kurtosis:                       2.520   Cond. No.                         352.\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 不良贷款 \n",
    "y = bad_loans_df.iloc[:, 0]\n",
    "# 各项贷款余额 + 本年累计应收贷款 + 贷款项目个数 + 本年固定资产投资额\n",
    "x = bad_loans_df.iloc[:, [1,2,3,4]]\n",
    "x = sm.add_constant(x)\n",
    "\n",
    "model_result = sm.OLS(endog=y, exog=x).fit()\n",
    "model_result.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "调整后的多重判定系数: Adj. R-squared\n",
    "\n",
    "$R_\\alpha^2 = 1 - (1 - R^2)(\\dfrac{n - 1}{n - k -1})$\n",
    "\n",
    "$R_\\alpha^2$为75.7%, 它的意义是, 用样本量和自变量个数对$R^2$调整后, 在Y的变差中, 能被多元回归方程解释的比例为75.7%\n",
    "\n",
    "-----\n",
    "\n",
    "观察几个自变量的p值, 只有**贷款余额** 0.001 < 0.005, 说明只有它的影响是显著的, 其他3个自变量对预测**不良贷款**的作用不大.\n",
    "\n",
    "| 自变量 | P值 |\n",
    "|:------:|:----:|\n",
    "|贷款余额 (亿元)      | 0.001  |\n",
    "|累计应收贷款 (亿元)  | 0.075  |\n",
    "|贷款项目个数 (个)        | 0.0145 |\n",
    "|固定资产投资额 (亿元)| 0.067  |"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "观察**本年固定资产投资额 (亿元)**系数-0.0292, 和实际有些矛盾, 只是由于**多重共线**导致的问题, 可以单独对该自变量做一元回归."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>            <td>y</td>        <th>  R-squared:         </th> <td>   0.269</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.237</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   8.458</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Tue, 25 Dec 2018</td> <th>  Prob (F-statistic):</th>  <td>0.00792</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>22:02:28</td>     <th>  Log-Likelihood:    </th> <td> -63.137</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>    25</td>      <th>  AIC:               </th> <td>   130.3</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    23</td>      <th>  BIC:               </th> <td>   132.7</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td>    0.9800</td> <td>    1.136</td> <td>    0.863</td> <td> 0.397</td> <td>   -1.370</td> <td>    3.330</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>x4</th>        <td>    0.0466</td> <td>    0.016</td> <td>    2.908</td> <td> 0.008</td> <td>    0.013</td> <td>    0.080</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>10.580</td> <th>  Durbin-Watson:     </th> <td>   2.047</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.005</td> <th>  Jarque-Bera (JB):  </th> <td>   8.772</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 1.224</td> <th>  Prob(JB):          </th> <td>  0.0125</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 4.559</td> <th>  Cond. No.          </th> <td>    128.</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                      y   R-squared:                       0.269\n",
       "Model:                            OLS   Adj. R-squared:                  0.237\n",
       "Method:                 Least Squares   F-statistic:                     8.458\n",
       "Date:                Tue, 25 Dec 2018   Prob (F-statistic):            0.00792\n",
       "Time:                        22:02:28   Log-Likelihood:                -63.137\n",
       "No. Observations:                  25   AIC:                             130.3\n",
       "Df Residuals:                      23   BIC:                             132.7\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      0.9800      1.136      0.863      0.397      -1.370       3.330\n",
       "x4             0.0466      0.016      2.908      0.008       0.013       0.080\n",
       "==============================================================================\n",
       "Omnibus:                       10.580   Durbin-Watson:                   2.047\n",
       "Prob(Omnibus):                  0.005   Jarque-Bera (JB):                8.772\n",
       "Skew:                           1.224   Prob(JB):                       0.0125\n",
       "Kurtosis:                       4.559   Cond. No.                         128.\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bad_loans_df.columns = ['y', 'x1', 'x2', 'x3', 'x4']\n",
    "formula_model_fitted = smf.ols(formula='y ~ x4', data=bad_loans_df).fit()\n",
    "formula_model_fitted.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "通过对**固定资产投资额**单独进行一元回归, 系数为正, 证实**多重共线**对回归有影响(这里对系数的正负符号有影响)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
